
cap log close
log using "$log/01_identify_training_end", text replace

use beh_pers_gr spell_length persnr begorig endorig year beh_beruf_num betnr  beh_ausbildung sex_id beh_staat_dummy age spell using  $BeH_large, clear
set seed 666

keep if inlist(beh_pers_gr,102,141)

cap drop n
bysort persnr: gen n = _n
bysort persnr: egen nn = max(n)
drop if nn == 1 & spell_length < 183 // Drop if spell is < 6 month
drop n nn

sort persnr begorig endorig
local l = 1
while `l' > 0 {
*drop inclosed and parralel spells
drop if ///
persnr == persnr[_n-1] & ///
begorig[_n-1] <= begorig & /// 
endorig[_n-1] >= endorig & /// 
year == year[_n-1]
local l = r(N_drop) 
}

*******************************************************************************
* Combine spells within year if same betnr and 2-digit-job

gen beruf = floor(beh_beruf_num/10)
replace beruf = -1 if beruf < 0 | inlist(beh_beruf_num,-5, 981,982)

merge m:1 betnr using "$temp/wz_bula.dta", keepusing(bula ind)
drop if _merge == 2
drop _merge

bysort persnr begorig: gen n = _n
gen temp = .
replace temp = age if n == 1
by persnr: egen train_start_age = min(temp)
drop temp

gen temp = .
replace temp = year if n == 1
by persnr: egen train_start_year = min(temp)
drop temp

gen temp = .
replace temp = bula if n == 1
by persnr: egen train_start_bula = min(temp)
drop temp

gen temp = .
replace temp = ind if n == 1
by persnr: egen train_start_ind = min(temp)
drop temp

gen temp = .
replace temp = beruf if n == 1
by persnr: egen first_train_beruf = min(temp)
drop n temp

sort persnr begorig betnr beruf

gen tag = 1 if ///
persnr == persnr[_n-1] & ///
betnr == betnr[_n-1] & ///
(beruf == beruf[_n-1] | beruf==-1 | beruf[_n-1]==-1) ///
& endorig[_n-1] + 91 >= begorig /// allow for 90 day break between spells
& year == year[_n-1] 

replace begorig = begorig[_n-1] if tag==1
replace spell_length = endorig - begorig + 1

replace beruf = beruf[_n-1] if beruf==-1 & beruf[_n-1]!=-1 & tag==1
replace beh_ausbildung = beh_ausbildung[_n-1] if beh_ausbildung==-7 & beruf[_n-1]!=-7 & tag==1
drop if tag[_n+1] == 1
drop tag

*******************************************************************************
* Combine spells within year if same 2-digit-job

sort persnr begorig beruf

gen tag = 1 if ///
persnr == persnr[_n-1] & ///
(beruf == beruf[_n-1] | beruf==-1 | beruf[_n-1]==-1) ///
& endorig[_n-1] + 91 >= begorig  /// allow for 90 day break between spells
& year == year[_n-1] 

replace begorig = begorig[_n-1] if tag==1
replace spell_length = endorig - begorig + 1

replace beruf = beruf[_n-1] if beruf==-1 & beruf[_n-1]!=-1 & tag==1
replace beh_ausbildung = beh_ausbildung[_n-1] if beh_ausbildung==-7 & beruf[_n-1]!=-7 & tag==1
drop if tag[_n+1] == 1
drop tag

*******************************************************************************
* Combine spells over year if same betnr and 2-digit-job

sort persnr begorig betnr beruf

gen tag = 1 if ///
persnr == persnr[_n-1] & ///
betnr == betnr[_n-1] & ///
(beruf == beruf[_n-1] | beruf==-1 | beruf[_n-1]==-1) ///
& endorig[_n-1] + 91 >= begorig // allow for 90 day break between spells

replace begorig = begorig[_n-1] if tag==1
replace spell_length = endorig - begorig + 1

replace beruf = beruf[_n-1] if beruf==-1 & beruf[_n-1]!=-1 & tag==1
replace beh_ausbildung = beh_ausbildung[_n-1] if beh_ausbildung==-7 & beruf[_n-1]!=-7 & tag==1
drop if tag[_n+1] == 1
drop tag

*******************************************************************************
* Combine spells over year if same 2-digit-job

sort persnr begorig beruf

gen tag = 1 if ///
persnr == persnr[_n-1] & ///
(beruf == beruf[_n-1] | beruf==-1 | beruf[_n-1]==-1) ///
& endorig[_n-1] + 91 >= begorig  // allow for 90 day break between spells

replace begorig = begorig[_n-1] if tag==1
replace spell_length = endorig - begorig + 1

replace beruf = beruf[_n-1] if beruf==-1 & beruf[_n-1]!=-1 & tag==1
replace beh_ausbildung = beh_ausbildung[_n-1] if beh_ausbildung==-7 & beruf[_n-1]!=-7 & tag==1
drop if tag[_n+1] == 1
drop tag

su spell_length, de

cap drop n

bysort persnr: egen max_training = max(spell_length)
bysort persnr: egen min_training = min(spell_length)
bysort persnr: egen no_trainings  = count(persnr)

by persnr: gen n = _n
tab no_trainings if n == 1

drop if no_trainings == 1 & !inrange(spell_length,183,1465)

gen keep = (no_trainings == 1 & inrange(spell_length,183,1465))
tab keep
replace keep = 1 if no_trainings > 1 & inrange(spell_length,183,1465)
tab keep

bysort persnr: egen min_keep = max(keep)
cap drop n
bysort persnr: gen n = _n
tab min_keep if n == 1
drop min_keep n

drop if no_trainings > 1 & max_training < 183
drop if no_trainings > 1 & min_training > 1465

drop if no_trainings > 1 & spell_length < 183
drop if no_trainings > 1 & spell_length > 1465

drop min_training max_training no_trainings keep  

cap drop n
bysort persnr: egen no_trainings  = count(persnr)
bysort persnr: gene n = _n
tab no_trainings if n == 1

bysort persnr: egen min_endorig = min(endorig)
drop if no_trainings > 1 & endorig != min_endorig

drop min_endorig

rename endorig        train_end
rename spell_length   train_lenght
rename begorig        train_start
rename beruf          train_occ
rename betnr          train_firm
rename beh_ausbildung train_edu
clonevar day_entry = train_end
gen year_entry     = year(day_entry)
gen entry_german = (beh_staat_dummy==1)
gen entry_gender = (sex_id == 2)
rename age train_end_age

replace train_edu = 1 if train_edu == 2
replace train_edu = 3 if train_edu == 4
replace train_edu = 5 if train_edu == 6
label define train_edu -7 "Missing" 0 "No degree" 1 "Volks-,Haupt-,Realschule" 3 "Abitur" 5 "Hochschule"
label values train_edu train_edu

keep persnr train_end train_lenght train_start train_firm train_occ day_entry train_edu year_entry entry_german entry_gender no_trainings train_end_age first_train_beruf spell train_start_age train_start_year train_start_bula train_start_ind 

gen same_train_occ = (first_train_beruf==train_occ)
tab same_train_occ, mis

su train_lenght, de
tab train_edu, mis

compress
save "$temp/train_end", replace
clear

cap log close
